; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic --tls-load-hoist=true -o - %s | FileCheck %s --check-prefix=HOIST0
; RUN: llc -mtriple=x86_64-unknown-unknown -O2 --relocation-model=pic -o - %s | FileCheck %s --check-prefix=HOIST2

; This test has no module flag {"tls-load-hoist", i32 0}, so use --tls-load-hoist=x
; to choose the way of loading thread_local address.

; This test come from compiling clang/test/CodeGen/intel/tls_loads.cpp with:
; (clang tls_loads.cpp -fPIC -ftls-model=global-dynamic -O2 -S -emit-llvm)

$_ZTW5thl_x = comdat any

$_ZTW6thl_x2 = comdat any

@thl_x = thread_local global i32 0, align 4
@thl_x2 = thread_local global i32 0, align 4
@_ZZ2f2iE2st.0 = internal thread_local unnamed_addr global i8 0, align 4
@_ZZ2f2iE2st.1 = internal thread_local unnamed_addr global i32 0, align 4

; For HOIST0, check call __tls_get_addr@PLT only one time for each thread_local variable.
; For HOIST2, Check the default way: usually call __tls_get_addr@PLT every time when use thread_local variable.

; Function Attrs: mustprogress uwtable
define i32 @_Z2f1i(i32 %c) local_unnamed_addr #0 {
; HOIST0-LABEL: _Z2f1i:
; HOIST0:       # %bb.0: # %entry
; HOIST0-NEXT:    pushq %r15
; HOIST0-NEXT:    .cfi_def_cfa_offset 16
; HOIST0-NEXT:    pushq %r14
; HOIST0-NEXT:    .cfi_def_cfa_offset 24
; HOIST0-NEXT:    pushq %rbx
; HOIST0-NEXT:    .cfi_def_cfa_offset 32
; HOIST0-NEXT:    .cfi_offset %rbx, -32
; HOIST0-NEXT:    .cfi_offset %r14, -24
; HOIST0-NEXT:    .cfi_offset %r15, -16
; HOIST0-NEXT:    movl %edi, %ebx
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    rex64
; HOIST0-NEXT:    callq __tls_get_addr@PLT
; HOIST0-NEXT:    movq %rax, %r14
; HOIST0-NEXT:    testl %ebx, %ebx
; HOIST0-NEXT:    je .LBB0_4
; HOIST0-NEXT:  # %bb.1: # %while.body.preheader
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    leaq thl_x2@TLSGD(%rip), %rdi
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    rex64
; HOIST0-NEXT:    callq __tls_get_addr@PLT
; HOIST0-NEXT:    movq %rax, %r15
; HOIST0-NEXT:    .p2align 4, 0x90
; HOIST0-NEXT:  .LBB0_2: # %while.body
; HOIST0-NEXT:    # =>This Inner Loop Header: Depth=1
; HOIST0-NEXT:    movl (%r15), %edi
; HOIST0-NEXT:    callq _Z6gfunc2i@PLT
; HOIST0-NEXT:    addl (%r14), %eax
; HOIST0-NEXT:    movl %eax, (%r14)
; HOIST0-NEXT:    decl %ebx
; HOIST0-NEXT:    jne .LBB0_2
; HOIST0-NEXT:    jmp .LBB0_3
; HOIST0-NEXT:  .LBB0_4: # %entry.while.end_crit_edge
; HOIST0-NEXT:    movl (%r14), %eax
; HOIST0-NEXT:  .LBB0_3: # %while.end
; HOIST0-NEXT:    popq %rbx
; HOIST0-NEXT:    .cfi_def_cfa_offset 24
; HOIST0-NEXT:    popq %r14
; HOIST0-NEXT:    .cfi_def_cfa_offset 16
; HOIST0-NEXT:    popq %r15
; HOIST0-NEXT:    .cfi_def_cfa_offset 8
; HOIST0-NEXT:    retq
;
; HOIST2-LABEL: _Z2f1i:
; HOIST2:       # %bb.0: # %entry
; HOIST2-NEXT:    pushq %rbp
; HOIST2-NEXT:    .cfi_def_cfa_offset 16
; HOIST2-NEXT:    pushq %rbx
; HOIST2-NEXT:    .cfi_def_cfa_offset 24
; HOIST2-NEXT:    pushq %rax
; HOIST2-NEXT:    .cfi_def_cfa_offset 32
; HOIST2-NEXT:    .cfi_offset %rbx, -24
; HOIST2-NEXT:    .cfi_offset %rbp, -16
; HOIST2-NEXT:    testl %edi, %edi
; HOIST2-NEXT:    je .LBB0_4
; HOIST2-NEXT:  # %bb.1:
; HOIST2-NEXT:    movl %edi, %ebx
; HOIST2-NEXT:    .p2align 4, 0x90
; HOIST2-NEXT:  .LBB0_2: # %while.body
; HOIST2-NEXT:    # =>This Inner Loop Header: Depth=1
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    leaq thl_x2@TLSGD(%rip), %rdi
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    rex64
; HOIST2-NEXT:    callq __tls_get_addr@PLT
; HOIST2-NEXT:    movl (%rax), %edi
; HOIST2-NEXT:    callq _Z6gfunc2i@PLT
; HOIST2-NEXT:    movl %eax, %ebp
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    rex64
; HOIST2-NEXT:    callq __tls_get_addr@PLT
; HOIST2-NEXT:    addl (%rax), %ebp
; HOIST2-NEXT:    movl %ebp, (%rax)
; HOIST2-NEXT:    decl %ebx
; HOIST2-NEXT:    jne .LBB0_2
; HOIST2-NEXT:    jmp .LBB0_3
; HOIST2-NEXT:  .LBB0_4: # %entry.while.end_crit_edge
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    rex64
; HOIST2-NEXT:    callq __tls_get_addr@PLT
; HOIST2-NEXT:    movl (%rax), %ebp
; HOIST2-NEXT:  .LBB0_3: # %while.end
; HOIST2-NEXT:    movl %ebp, %eax
; HOIST2-NEXT:    addq $8, %rsp
; HOIST2-NEXT:    .cfi_def_cfa_offset 24
; HOIST2-NEXT:    popq %rbx
; HOIST2-NEXT:    .cfi_def_cfa_offset 16
; HOIST2-NEXT:    popq %rbp
; HOIST2-NEXT:    .cfi_def_cfa_offset 8
; HOIST2-NEXT:    retq
entry:
  %tobool.not3 = icmp eq i32 %c, 0
  br i1 %tobool.not3, label %entry.while.end_crit_edge, label %while.body

entry.while.end_crit_edge:                        ; preds = %entry
  %.pre = load i32, ptr @thl_x, align 4
  br label %while.end

while.body:                                       ; preds = %entry, %while.body
  %c.addr.04 = phi i32 [ %dec, %while.body ], [ %c, %entry ]
  %dec = add nsw i32 %c.addr.04, -1
  %0 = load i32, ptr @thl_x2, align 4
  %call = tail call i32 @_Z6gfunc2i(i32 %0)
  %1 = load i32, ptr @thl_x, align 4
  %add = add nsw i32 %1, %call
  store i32 %add, ptr @thl_x, align 4
  %tobool.not = icmp eq i32 %dec, 0
  br i1 %tobool.not, label %while.end, label %while.body

while.end:                                        ; preds = %while.body, %entry.while.end_crit_edge
  %2 = phi i32 [ %.pre, %entry.while.end_crit_edge ], [ %add, %while.body ]
  ret i32 %2
}

declare i32 @_Z6gfunc2i(i32) local_unnamed_addr #1

; Function Attrs: mustprogress uwtable
define i32 @_Z2f2i(i32 %c) local_unnamed_addr #0 {
; HOIST0-LABEL: _Z2f2i:
; HOIST0:       # %bb.0: # %entry
; HOIST0-NEXT:    pushq %r15
; HOIST0-NEXT:    .cfi_def_cfa_offset 16
; HOIST0-NEXT:    pushq %r14
; HOIST0-NEXT:    .cfi_def_cfa_offset 24
; HOIST0-NEXT:    pushq %r12
; HOIST0-NEXT:    .cfi_def_cfa_offset 32
; HOIST0-NEXT:    pushq %rbx
; HOIST0-NEXT:    .cfi_def_cfa_offset 40
; HOIST0-NEXT:    pushq %rax
; HOIST0-NEXT:    .cfi_def_cfa_offset 48
; HOIST0-NEXT:    .cfi_offset %rbx, -40
; HOIST0-NEXT:    .cfi_offset %r12, -32
; HOIST0-NEXT:    .cfi_offset %r14, -24
; HOIST0-NEXT:    .cfi_offset %r15, -16
; HOIST0-NEXT:    movl %edi, %ebx
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    rex64
; HOIST0-NEXT:    callq __tls_get_addr@PLT
; HOIST0-NEXT:    movq %rax, %r14
; HOIST0-NEXT:    testl %ebx, %ebx
; HOIST0-NEXT:    je .LBB1_3
; HOIST0-NEXT:  # %bb.1: # %while.body.preheader
; HOIST0-NEXT:    leaq _ZZ2f2iE2st.0@TLSLD(%rip), %rdi
; HOIST0-NEXT:    callq __tls_get_addr@PLT
; HOIST0-NEXT:    movq %rax, %rcx
; HOIST0-NEXT:    leaq _ZZ2f2iE2st.0@DTPOFF(%rax), %r15
; HOIST0-NEXT:    leaq _ZZ2f2iE2st.1@DTPOFF(%rax), %r12
; HOIST0-NEXT:    .p2align 4, 0x90
; HOIST0-NEXT:  .LBB1_2: # %while.body
; HOIST0-NEXT:    # =>This Inner Loop Header: Depth=1
; HOIST0-NEXT:    callq _Z5gfuncv@PLT
; HOIST0-NEXT:    addl %eax, (%r14)
; HOIST0-NEXT:    callq _Z5gfuncv@PLT
; HOIST0-NEXT:    addb %al, (%r15)
; HOIST0-NEXT:    callq _Z5gfuncv@PLT
; HOIST0-NEXT:    addl %eax, (%r12)
; HOIST0-NEXT:    decl %ebx
; HOIST0-NEXT:    jne .LBB1_2
; HOIST0-NEXT:  .LBB1_3: # %while.end
; HOIST0-NEXT:    movl (%r14), %eax
; HOIST0-NEXT:    addq $8, %rsp
; HOIST0-NEXT:    .cfi_def_cfa_offset 40
; HOIST0-NEXT:    popq %rbx
; HOIST0-NEXT:    .cfi_def_cfa_offset 32
; HOIST0-NEXT:    popq %r12
; HOIST0-NEXT:    .cfi_def_cfa_offset 24
; HOIST0-NEXT:    popq %r14
; HOIST0-NEXT:    .cfi_def_cfa_offset 16
; HOIST0-NEXT:    popq %r15
; HOIST0-NEXT:    .cfi_def_cfa_offset 8
; HOIST0-NEXT:    retq
;
; HOIST2-LABEL: _Z2f2i:
; HOIST2:       # %bb.0: # %entry
; HOIST2-NEXT:    pushq %rbp
; HOIST2-NEXT:    .cfi_def_cfa_offset 16
; HOIST2-NEXT:    pushq %r14
; HOIST2-NEXT:    .cfi_def_cfa_offset 24
; HOIST2-NEXT:    pushq %rbx
; HOIST2-NEXT:    .cfi_def_cfa_offset 32
; HOIST2-NEXT:    .cfi_offset %rbx, -32
; HOIST2-NEXT:    .cfi_offset %r14, -24
; HOIST2-NEXT:    .cfi_offset %rbp, -16
; HOIST2-NEXT:    testl %edi, %edi
; HOIST2-NEXT:    je .LBB1_3
; HOIST2-NEXT:  # %bb.1: # %while.body.preheader
; HOIST2-NEXT:    movl %edi, %ebx
; HOIST2-NEXT:    .p2align 4, 0x90
; HOIST2-NEXT:  .LBB1_2: # %while.body
; HOIST2-NEXT:    # =>This Inner Loop Header: Depth=1
; HOIST2-NEXT:    callq _Z5gfuncv@PLT
; HOIST2-NEXT:    movl %eax, %ebp
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    rex64
; HOIST2-NEXT:    callq __tls_get_addr@PLT
; HOIST2-NEXT:    addl %ebp, (%rax)
; HOIST2-NEXT:    callq _Z5gfuncv@PLT
; HOIST2-NEXT:    movl %eax, %ebp
; HOIST2-NEXT:    leaq _ZZ2f2iE2st.0@TLSLD(%rip), %rdi
; HOIST2-NEXT:    callq __tls_get_addr@PLT
; HOIST2-NEXT:    movq %rax, %r14
; HOIST2-NEXT:    addb %bpl, _ZZ2f2iE2st.0@DTPOFF(%rax)
; HOIST2-NEXT:    callq _Z5gfuncv@PLT
; HOIST2-NEXT:    movl %eax, %ecx
; HOIST2-NEXT:    movq %r14, %rax
; HOIST2-NEXT:    addl %ecx, _ZZ2f2iE2st.1@DTPOFF(%r14)
; HOIST2-NEXT:    decl %ebx
; HOIST2-NEXT:    jne .LBB1_2
; HOIST2-NEXT:  .LBB1_3: # %while.end
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    rex64
; HOIST2-NEXT:    callq __tls_get_addr@PLT
; HOIST2-NEXT:    movl (%rax), %eax
; HOIST2-NEXT:    popq %rbx
; HOIST2-NEXT:    .cfi_def_cfa_offset 24
; HOIST2-NEXT:    popq %r14
; HOIST2-NEXT:    .cfi_def_cfa_offset 16
; HOIST2-NEXT:    popq %rbp
; HOIST2-NEXT:    .cfi_def_cfa_offset 8
; HOIST2-NEXT:    retq
entry:
  %tobool.not9 = icmp eq i32 %c, 0
  br i1 %tobool.not9, label %while.end, label %while.body

while.body:                                       ; preds = %entry, %while.body
  %c.addr.010 = phi i32 [ %dec, %while.body ], [ %c, %entry ]
  %dec = add nsw i32 %c.addr.010, -1
  %call = tail call i32 @_Z5gfuncv()
  %0 = load i32, ptr @thl_x, align 4
  %add = add nsw i32 %0, %call
  store i32 %add, ptr @thl_x, align 4
  %call1 = tail call i32 @_Z5gfuncv()
  %1 = load i8, ptr @_ZZ2f2iE2st.0, align 4
  %2 = trunc i32 %call1 to i8
  %conv5 = add i8 %1, %2
  store i8 %conv5, ptr @_ZZ2f2iE2st.0, align 4
  %call6 = tail call i32 @_Z5gfuncv()
  %3 = load i32, ptr @_ZZ2f2iE2st.1, align 4
  %add7 = add nsw i32 %3, %call6
  store i32 %add7, ptr @_ZZ2f2iE2st.1, align 4
  %tobool.not = icmp eq i32 %dec, 0
  br i1 %tobool.not, label %while.end, label %while.body

while.end:                                        ; preds = %while.body, %entry
  %4 = load i32, ptr @thl_x, align 4
  ret i32 %4
}

declare i32 @_Z5gfuncv() local_unnamed_addr #1

; Function Attrs: mustprogress uwtable
define i32 @_Z2f3i(i32 %c) local_unnamed_addr #0 {
; HOIST0-LABEL: _Z2f3i:
; HOIST0:       # %bb.0: # %entry
; HOIST0-NEXT:    pushq %rbx
; HOIST0-NEXT:    .cfi_def_cfa_offset 16
; HOIST0-NEXT:    .cfi_offset %rbx, -16
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    data16
; HOIST0-NEXT:    rex64
; HOIST0-NEXT:    callq __tls_get_addr@PLT
; HOIST0-NEXT:    movq %rax, %rbx
; HOIST0-NEXT:    movl (%rax), %edi
; HOIST0-NEXT:    callq _Z6gfunc2i@PLT
; HOIST0-NEXT:    movl (%rbx), %edi
; HOIST0-NEXT:    callq _Z6gfunc2i@PLT
; HOIST0-NEXT:    movl $1, %eax
; HOIST0-NEXT:    popq %rbx
; HOIST0-NEXT:    .cfi_def_cfa_offset 8
; HOIST0-NEXT:    retq
;
; HOIST2-LABEL: _Z2f3i:
; HOIST2:       # %bb.0: # %entry
; HOIST2-NEXT:    pushq %rbx
; HOIST2-NEXT:    .cfi_def_cfa_offset 16
; HOIST2-NEXT:    .cfi_offset %rbx, -16
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    leaq thl_x@TLSGD(%rip), %rdi
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    data16
; HOIST2-NEXT:    rex64
; HOIST2-NEXT:    callq __tls_get_addr@PLT
; HOIST2-NEXT:    movq %rax, %rbx
; HOIST2-NEXT:    movl (%rax), %edi
; HOIST2-NEXT:    callq _Z6gfunc2i@PLT
; HOIST2-NEXT:    movl (%rbx), %edi
; HOIST2-NEXT:    callq _Z6gfunc2i@PLT
; HOIST2-NEXT:    movl $1, %eax
; HOIST2-NEXT:    popq %rbx
; HOIST2-NEXT:    .cfi_def_cfa_offset 8
; HOIST2-NEXT:    retq
entry:
  %0 = load i32, ptr @thl_x, align 4
  %call = tail call i32 @_Z6gfunc2i(i32 %0)
  %1 = load i32, ptr @thl_x, align 4
  %call1 = tail call i32 @_Z6gfunc2i(i32 %1)
  ret i32 1
}

attributes #0 = { nounwind mustprogress uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { uwtable "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

!llvm.module.flags = !{!0, !1, !2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"PIC Level", i32 2}
!2 = !{i32 7, !"uwtable", i32 1}
